TweetTopology.java example

Explorer

Storm-ud381-master
- lesson1
  - stage1
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReporterExclamationTopology.java
  - stage2
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReporterExclamationTopology.java
        spout
        RandomSentenceSpout.java
  - stage3
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReporterExclamationTopology.java
        spout
        RandomSentenceSpout.java
- lesson2
  - stage1
    - src
      - jvm
        udacity
        storm
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage2
    - src
      - jvm
        udacity
        storm
        SentenceCountTopology.java
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage3
    - src
      - jvm
        udacity
        storm
        SentenceCountTopology.java
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage4
    - src
      - jvm
        udacity
        storm
        SentenceCountTopology.java
        SentenceWordCountTopology.java
        WordCountTopology.java
        spout
        RandomSentenceSpout.java
  - stage5
    - src
      - jvm
        udacity
        storm
        TweetTopology.java
  - stage6
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        TweetSpout.java
        TweetTopology.java
  - stage7
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        RollingCountBolt.java
        TweetSpout.java
        TweetTopology.java
        tools
        NthLastModifiedTimeTracker.java
        SlidingWindowCounter.java
        SlotBasedCounter.java
        TupleHelpers.java
- lesson3
  - stage1
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        TweetSpout.java
        TweetTopology.java
  - stage2
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        SplitSentence.java
        TweetSpout.java
        TweetTopology.java
  - stage3
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        SplitSentence.java
        TweetSpout.java
        TweetTopology.java
        URLBolt.java
  - stage4
    - src
      - jvm
        udacity
        storm
        CountBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        RollingCountBolt.java
        TweetSpout.java
        TweetTopology.java
        tools
        NthLastModifiedTimeTracker.java
        SlidingWindowCounter.java
        SlotBasedCounter.java
        TupleHelpers.java
  - stage5
    - src
      - jvm
        udacity
        storm
        AbstractRankerBolt.java
        CountBolt.java
        IntermediateRankingsBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        RollingCountBolt.java
        TopNTweetTopology.java
        TotalRankingsBolt.java
        TweetSpout.java
        spout
        RandomSentenceSpout.java
        tools
        NthLastModifiedTimeTracker.java
        Rankable.java
        RankableObjectWithFields.java
        Rankings.java
        SlidingWindowCounter.java
        SlotBasedCounter.java
        TupleHelpers.java
  - stage6
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReportBolt.java
        spout
        MyLikesSpout.java
        MyNamesSpout.java
  - stage7
    - src
      - jvm
        udacity
        storm
        ExclamationTopology.java
        ReportBolt.java
        spout
        MyLikesSpout.java
        MyNamesSpout.java
- lesson4
  - TeamAwesome
    - FinalProject
      - src
        jvm
        geocode
        GeoName.java
        ReverseGeoCode.java
        kdtree
        KDNode.java
        KDNodeComparator.java
        KDTree.java
        udacity
        storm
        CountBolt.java
        InfoBolt.java
        ParseTweetBolt.java
        ReportBolt.java
        TopNTweetTopology.java
        TopWords.java
        TweetSpout.java
        spout
        RandomSentenceSpout.java
        tools
        CountiesLookup.java
        NthLastModifiedTimeTracker.java
        Rankable.java
        RankableObjectWithFields.java
        Rankings.java
        SentimentAnalyzer.java
        SlidingWindowCounter.java
        SlotBasedCounter.java
        TupleHelpers.java
        ValueComparator.java

package udacity.storm;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.testing.TestWordSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;

import twitter4j.conf.ConfigurationBuilder;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.StallWarning;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;

import com.lambdaworks.redis.RedisClient;
import com.lambdaworks.redis.RedisConnection;

/**
 * This is a basic example of a Storm topology.
 */
public class TweetTopology {

  /**
   * A spout that uses Twitter streaming API for continuously
   * getting tweets
   */
  public static class TweetSpout extends BaseRichSpout
  {
    // Twitter API authentication credentials
    String custkey, custsecret;
    String accesstoken, accesssecret;

    // To output tuples from spout to the next stage bolt
    SpoutOutputCollector collector;

    // Twitter4j - twitter stream to get tweets
    TwitterStream twitterStream;

    // Shared queue for getting buffering tweets received
    LinkedBlockingQueue<String> queue = null;

    // Class for listening on the tweet stream - for twitter4j
    private class TweetListener implements StatusListener {

      // Implement the callback function when a tweet arrives
      @Override
      public void onStatus(Status status)
      {
        // add the tweet into the queue buffer
        queue.offer(status.getText());
      }

      @Override
      public void onDeletionNotice(StatusDeletionNotice sdn)
      {
      }

      @Override
      public void onTrackLimitationNotice(int i)
      {
      }

      @Override
      public void onScrubGeo(long l, long l1)
      {
      }

      @Override
      public void onStallWarning(StallWarning warning)
      {
      }

      @Override
      public void onException(Exception e)
      {
        e.printStackTrace();
      }
    };

    /**
     * Constructor for tweet spout that accepts the credentials
     */
    public TweetSpout(
        String                key,
        String                secret,
        String                token,
        String                tokensecret)
    {
      custkey = key;
      custsecret = secret;
      accesstoken = token;
      accesssecret = tokensecret;
    }

    @Override
    public void open(
        Map                     map,
        TopologyContext         topologyContext,
        SpoutOutputCollector    spoutOutputCollector)
    {
      // create the buffer to block tweets
      queue = new LinkedBlockingQueue<String>(1000);

      // save the output collector for emitting tuples
      collector = spoutOutputCollector;


      // build the config with credentials for twitter 4j
      ConfigurationBuilder config =
          new ConfigurationBuilder()
                 .setOAuthConsumerKey(custkey)
                 .setOAuthConsumerSecret(custsecret)
                 .setOAuthAccessToken(accesstoken)
                 .setOAuthAccessTokenSecret(accesssecret);

      // create the twitter stream factory with the config
      TwitterStreamFactory fact =
          new TwitterStreamFactory(config.build());

      // get an instance of twitter stream
      twitterStream = fact.getInstance();

      // provide the handler for twitter stream
      twitterStream.addListener(new TweetListener());

      // start the sampling of tweets
      twitterStream.sample();
    }

    @Override
    public void nextTuple()
    {
      // try to pick a tweet from the buffer
      String ret = queue.poll();

      // if no tweet is available, wait for 50 ms and return
      if (ret==null)
      {
        Utils.sleep(50);
        return;
      }

      // now emit the tweet to next stage bolt
      collector.emit(new Values(ret));
    }

    @Override
    public void close()
    {
      // shutdown the stream - when we are going to exit
      twitterStream.shutdown();
    }

    /**
     * Component specific configuration
     */
    @Override
    public Map<String, Object> getComponentConfiguration()
    {
      // create the component config
      Config ret = new Config();

      // set the parallelism for this spout to be 1
      ret.setMaxTaskParallelism(1);

      return ret;
    }

    @Override
    public void declareOutputFields(
        OutputFieldsDeclarer outputFieldsDeclarer)
    {
      // tell storm the schema of the output tuple for this spout
      // tuple consists of a single column called 'tweet'
      outputFieldsDeclarer.declare(new Fields("tweet"));
    }
  }

  /**
   * A bolt that parses the tweet into words
   */
  public static class ParseTweetBolt extends BaseRichBolt
  {
    // To output tuples from this bolt to the count bolt
    OutputCollector collector;

    @Override
    public void prepare(
        Map                     map,
        TopologyContext         topologyContext,
        OutputCollector         outputCollector)
    {
      // save the output collector for emitting tuples
      collector = outputCollector;
    }

    @Override
    public void execute(Tuple tuple)
    {
      // get the 1st column 'tweet' from tuple
      String tweet = tuple.getString(0);

      // provide the delimiters for splitting the tweet
      String delims = "[ .,?!]+";

      // now split the tweet into tokens
      String[] tokens = tweet.split(delims);

      // for each token/word, emit it
      for (String token: tokens) {
        collector.emit(new Values(token));
      }
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer)
    {
      // tell storm the schema of the output tuple for this spout
      // tuple consists of a single column called 'tweet-word'
      declarer.declare(new Fields("tweet-word"));
    }
  }

  /**
   * A bolt that counts the words that it receives
   */
  static class CountBolt extends BaseRichBolt {

    // To output tuples from this bolt to the next stage bolts, if any
    private OutputCollector collector;

    // Map to store the count of the words
    private Map<String, Integer> countMap;

    @Override
    public void prepare(
        Map                     map,
        TopologyContext         topologyContext,
        OutputCollector         outputCollector)
    {

      // save the collector for emitting tuples
      collector = outputCollector;

      // create and initialize the map
      countMap = new HashMap<String, Integer>();
    }

    @Override
    public void execute(Tuple tuple)
    {
      // get the word from the 1st column of incoming tuple
      String word = tuple.getString(0);

      // check if the word is present in the map
      if (countMap.get(word) == null) {

        // not present, add the word with a count of 1
        countMap.put(word, 1);
      } else {

        // already there, hence get the count
        Integer val = countMap.get(word);

        // increment the count and save it to the map
        countMap.put(word, ++val);
      }

      // emit the word and count
      collector.emit(new Values(word, countMap.get(word)));
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
    {
      // tell storm the schema of the output tuple for this spout
      // tuple consists of a two columns called 'word' and 'count'

      // declare the first column 'word', second column 'count'
      outputFieldsDeclarer.declare(new Fields("word","count"));
    }
  }

  /**
   * A bolt that prints the word and count to redis
   */
  static class ReportBolt extends BaseRichBolt
  {
    // place holder to keep the connection to redis
    transient RedisConnection<String,String> redis;

    @Override
    public void prepare(
        Map                     map,
        TopologyContext         topologyContext,
        OutputCollector         outputCollector)
    {
      // instantiate a redis connection
      RedisClient client = new RedisClient("localhost",6379);

      // initiate the actual connection
      redis = client.connect();
    }

    @Override
    public void execute(Tuple tuple)
    {
      // access the first column 'word'
      String word = tuple.getStringByField("word");

      // access the second column 'count'
      Integer count = tuple.getIntegerByField("count");

      // publish the word count to redis using word as the key
      redis.publish("WordCountTopology", word + "|" + Long.toString(count));
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer)
    {
      // nothing to add - since it is the final bolt
    }
  }

  public static void main(String[] args) throws Exception
  {
    // create the topology
    TopologyBuilder builder = new TopologyBuilder();

    /*
     * In order to create the spout, you need to get twitter credentials
     * If you need to use Twitter firehose/Tweet stream for your idea,
     * create a set of credentials by following the instructions at
     *
     * https://dev.twitter.com/discussions/631
     *
     */

    // now create the tweet spout with the credentials
    TweetSpout tweetSpout = new TweetSpout(
        //"[Your customer key]",
        //"[Your secret key]",
        //"[Your access token]",
        //"[Your access secret]"
    );

    // attach the tweet spout to the topology - parallelism of 1
    builder.setSpout("tweet-spout", tweetSpout, 1);

    //*********************************************************************
    // Complete the Topology.
    // Part 1: // attach the parse tweet bolt, parallelism of 10 (what grouping is needed?)
    // Part 2: // attach the count bolt, parallelism of 15 (what grouping is needed?)
    // Part 3: attach the report bolt, parallelism of 1 (what grouping is needed?)
    // Submit and run the topology.


    //*********************************************************************


    // create the default config object
    Config conf = new Config();

    // set the config in debugging mode
    conf.setDebug(true);

    if (args != null && args.length > 0) {

      // run it in a live cluster

      // set the number of workers for running all spout and bolt tasks
      conf.setNumWorkers(3);

      // create the topology and submit with config
      StormSubmitter.submitTopology(args[0], conf, builder.createTopology());

    } else {

      // run it in a simulated local cluster

      // set the number of threads to run - similar to setting number of workers in live cluster
      conf.setMaxTaskParallelism(3);

      // create the local cluster instance
      LocalCluster cluster = new LocalCluster();

      // submit the topology to the local cluster
      cluster.submitTopology("tweet-word-count", conf, builder.createTopology());

      // let the topology run for 30 seconds. note topologies never terminate!
      Utils.sleep(30000);

      // now kill the topology
      cluster.killTopology("tweet-word-count");

      // we are done, so shutdown the local cluster
      cluster.shutdown();
    }
  }
}